package net.oov.identify.support;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

public class CorpusManager {
	
	public static List<String> getCorpusTextPath(String dirName){
		
		File dir = new File(dirName);
		List<String> textPath = new ArrayList<String>();
		for(File text:dir.listFiles()){
			if(text.getName().endsWith("txt")){
				textPath.add(dirName+File.separator+text.getName());
			}
		}
	
		return textPath;
	}
	
	public static void main(String[] args){
		
		String dirName = "taobao_products";
		
		try {
			System.out.println(getTotalLineNum(getCorpusTextPath(dirName)));//171233
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}
	
	/**
	 * @param textPath
	 * @return
	 * @throws IOException
	 * 
	 */
	public static long getTotalLineNum(List<String> textPath) throws IOException{
		long totalLineNum = 0;
		for(String s:textPath){
			InputStreamReader isr = new InputStreamReader(new FileInputStream(s), "UTF-8");
			BufferedReader br = new BufferedReader(isr);
			while(br.readLine()!=null){
				totalLineNum++;
			}
		}
		return totalLineNum;
		
	}
	
}
